sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.35 R6_2.5.1 fastmap_1.1.1 xfun_0.43
## [5] cachem_1.0.8 knitr_1.45 htmltools_0.5.8.1 rmarkdown_2.26
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.9 jquerylib_0.1.4
## [13] compiler_4.3.2 rstudioapi_0.16.0 tools_4.3.2 evaluate_0.23
## [17] bslib_0.7.0 yaml_2.3.8 rlang_1.1.3 jsonlite_1.8.8
#BiocManager::install("simplifyEnrichment")
First, load the necessary packages.
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tidyr)
library(grDevices)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
##
## rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:geneLenDataBase':
##
## unfactor
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
##
## addNode
## The following object is masked from 'package:circlize':
##
## degree
## The following object is masked from 'package:plyr':
##
## join
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## The following objects are masked from 'package:zoo':
##
## yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
library(simplifyEnrichment)
##
## ========================================
## simplifyEnrichment version 1.10.0
## Bioconductor page: https://bioconductor.org/packages/simplifyEnrichment/
## Github page: https://github.com/jokergoo/simplifyEnrichment
## Documentation: https://jokergoo.github.io/simplifyEnrichment/
## Examples: https://simplifyenrichment.github.io/
##
## If you use it in published research, please cite:
## Gu, Z. simplifyEnrichment: an R/Bioconductor package for Clustering and
## Visualizing Functional Enrichment Results, Genomics, Proteomics &
## Bioinformatics 2022.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(simplifyEnrichment))
## ========================================
library(rtracklayer)
gff<-rtracklayer::import("../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3")
gff<-as.data.frame(gff) %>% dplyr::select(-Parent)
dim(gff) # 478988 9
## [1] 478988 12
names(gff)
## [1] "seqnames" "start" "end" "width"
## [5] "strand" "source" "type" "score"
## [9] "phase" "ID" "transcript_id" "gene_id"
transcripts <- subset(gff, type == "transcript")
transcripts_gr <- makeGRangesFromDataFrame(transcripts, keep.extra.columns=TRUE) #extract length information
transcript_lengths <- width(transcripts_gr) #isolate length of each gene
seqnames<-transcripts_gr$ID #extract list of gene id
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame
dim(transcripts) #33730 13
## [1] 33730 12
kegg <- read.delim("../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt",header = FALSE)
kegg <- as.data.frame(kegg)
colnames(kegg)[1] <- "gene_id"
colnames(kegg)[2] <- "KEGG_new"
head(kegg)
## gene_id KEGG_new
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b K22584
## 3 Pocillopora_acuta_HIv2___RNAseq.g22918.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 K03386
## 5 Pocillopora_acuta_HIv2___RNAseq.g7985.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13511.t1
eggnog<-read.delim("../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt")#this file contains all of the go terms, descriptions, kegg, etc
eggnog<- plyr::rename(eggnog, c("X.query"="gene_id"))
head(eggnog,2)
## gene_id seed_ortholog evalue score
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a 45351.EDO48725 2.16e-120 364
## 2 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 45351.EDO38694 3.18e-123 355
## eggNOG_OGs
## 1 COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2 COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
## max_annot_lvl COG_category
## 1 33208|Metazoa E
## 2 33208|Metazoa O
## Description Preferred_name
## 1 Cobalamin-independent synthase, Catalytic domain -
## 2 negative regulation of male germ cell proliferation PRDX4
## GOs
## 1 -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
## EC KEGG_ko
## 1 2.1.1.14 ko:K00549
## 2 1.11.1.15 ko:K03386
## KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2 ko04214,map04214
## KEGG_Module KEGG_Reaction KEGG_rclass
## 1 M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2 - - -
## BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000 - - -
## 2 ko00000,ko00001,ko01000,ko04147 - - -
## PFAMs
## 1 Meth_synt_2
## 2 1-cysPrx_C,AhpC-TSA
gogene <- merge(transcripts, eggnog, by=c("gene_id"), all=T)
gogene <- merge(gogene, kegg, by=c("gene_id"), all=T)
head(gogene,2)
## gene_id seqnames
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t Pocillopora_acuta_HIv2___Sc0000013
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t Pocillopora_acuta_HIv2___Sc0000013
## start end width strand source type score.x phase
## 1 4542087 4551503 9417 + AUGUSTUS transcript NA NA
## 2 4639103 4647350 8248 + AUGUSTUS transcript NA NA
## ID
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t
## transcript_id seed_ortholog evalue score.y
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t 45351.EDO27354 2.41e-93 317
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t 6087.XP_002166004.2 1.28e-38 164
## eggNOG_OGs
## 1 COG0666@1|root,KOG0510@2759|Eukaryota,38G7Q@33154|Opisthokonta,3BCDU@33208|Metazoa
## 2 COG0666@1|root,KOG4177@2759|Eukaryota
## max_annot_lvl COG_category Description
## 1 33208|Metazoa DZ osmolarity-sensing cation channel activity
## 2 2759|Eukaryota I spectrin binding
## Preferred_name
## 1 TRPA1
## 2 -
## GOs
## 1 GO:0000302,GO:0001580,GO:0002791,GO:0002793,GO:0003008,GO:0003012,GO:0003674,GO:0004888,GO:0005034,GO:0005215,GO:0005216,GO:0005217,GO:0005244,GO:0005245,GO:0005261,GO:0005262,GO:0005488,GO:0005515,GO:0005575,GO:0005623,GO:0005886,GO:0005887,GO:0006810,GO:0006811,GO:0006812,GO:0006816,GO:0006873,GO:0006874,GO:0006875,GO:0006936,GO:0006939,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007166,GO:0007204,GO:0007600,GO:0007602,GO:0007606,GO:0007610,GO:0007638,GO:0008150,GO:0008324,GO:0009266,GO:0009314,GO:0009408,GO:0009409,GO:0009410,GO:0009416,GO:0009453,GO:0009581,GO:0009582,GO:0009583,GO:0009593,GO:0009605,GO:0009612,GO:0009628,GO:0009636,GO:0009719,GO:0009966,GO:0009967,GO:0009987,GO:0010033,GO:0010035,GO:0010037,GO:0010243,GO:0010378,GO:0010646,GO:0010647,GO:0010817,GO:0014070,GO:0014074,GO:0014832,GO:0014848,GO:0015075,GO:0015085,GO:0015267,GO:0015276,GO:0015278,GO:0015318,GO:0016020,GO:0016021,GO:0016043,GO:0016048,GO:0016324,GO:0019233,GO:0019722,GO:0019725,GO:0019932,GO:0022607,GO:0022803,GO:0022832,GO:0022834,GO:0022836,GO:0022838,GO:0022839,GO:0022843,GO:0022857,GO:0022890,GO:0023041,GO:0023051,GO:0023052,GO:0023056,GO:0030001,GO:0030003,GO:0030424,GO:0031000,GO:0031224,GO:0031226,GO:0031644,GO:0031646,GO:0032024,GO:0032421,GO:0032501,GO:0032879,GO:0032880,GO:0032991,GO:0033554,GO:0033555,GO:0034220,GO:0034605,GO:0034702,GO:0034703,GO:0035556,GO:0035690,GO:0035774,GO:0036270,GO:0038023,GO:0040011,GO:0040040,GO:0042221,GO:0042330,GO:0042331,GO:0042391,GO:0042493,GO:0042542,GO:0042592,GO:0042752,GO:0042802,GO:0042995,GO:0043005,GO:0043052,GO:0043269,GO:0043270,GO:0043279,GO:0043933,GO:0044057,GO:0044070,GO:0044085,GO:0044425,GO:0044459,GO:0044464,GO:0045177,GO:0046677,GO:0046873,GO:0046883,GO:0046887,GO:0046957,GO:0048265,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048878,GO:0050708,GO:0050714,GO:0050789,GO:0050794,GO:0050796,GO:0050801,GO:0050848,GO:0050850,GO:0050877,GO:0050896,GO:0050906,GO:0050907,GO:0050909,GO:0050912,GO:0050913,GO:0050951,GO:0050954,GO:0050955,GO:0050960,GO:0050961,GO:0050965,GO:0050966,GO:0050968,GO:0050974,GO:0050982,GO:0051046,GO:0051047,GO:0051049,GO:0051050,GO:0051179,GO:0051209,GO:0051222,GO:0051223,GO:0051234,GO:0051239,GO:0051240,GO:0051259,GO:0051260,GO:0051262,GO:0051282,GO:0051283,GO:0051289,GO:0051480,GO:0051606,GO:0051641,GO:0051649,GO:0051716,GO:0051930,GO:0051931,GO:0051969,GO:0052129,GO:0055065,GO:0055074,GO:0055080,GO:0055082,GO:0055085,GO:0060089,GO:0060341,GO:0060401,GO:0060402,GO:0061178,GO:0065003,GO:0065007,GO:0065008,GO:0070201,GO:0070417,GO:0070588,GO:0070838,GO:0070887,GO:0071241,GO:0071244,GO:0071310,GO:0071312,GO:0071313,GO:0071407,GO:0071415,GO:0071417,GO:0071466,GO:0071495,GO:0071840,GO:0071944,GO:0072347,GO:0072503,GO:0072507,GO:0072511,GO:0090087,GO:0090276,GO:0090277,GO:0097458,GO:0097553,GO:0097603,GO:0097604,GO:0098590,GO:0098655,GO:0098660,GO:0098662,GO:0098771,GO:0098796,GO:0098862,GO:0098900,GO:0098908,GO:0099094,GO:0099604,GO:0120025,GO:1901698,GO:1901699,GO:1901700,GO:1901701,GO:1902495,GO:1902531,GO:1902533,GO:1903522,GO:1903530,GO:1903532,GO:1903793,GO:1904058,GO:1904951,GO:1990351,GO:1990760
## 2 -
## EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass
## 1 - ko:K04984 ko04750,map04750 - - -
## 2 - - - - - -
## BRITE KEGG_TC CAZy
## 1 ko00000,ko00001,ko04040 1.A.4.6.1,1.A.4.6.2,1.A.4.6.3,1.A.4.6.5 -
## 2 - - -
## BiGG_Reaction PFAMs KEGG_new
## 1 - Ank,Ank_2,Ank_3,Ank_4,Ion_trans K04984
## 2 - Ank,Ank_2,Ank_4,Ank_5,Ion_trans K04984
dim(gogene)
## [1] 33730 33
geneInfo <- read.csv("../../output/WGCNA/WGCNA_ModuleMembership.csv") #this file was generated from the WGCNA analyses and contains the modules of interest
geneInfo<- plyr::rename(geneInfo, c("X"="gene_id"))
dim(geneInfo) # there are 9012 genes in our gene info file
## [1] 9012 40
geneInfo <- merge(gogene, geneInfo, by=c("gene_id")) #merging the GO and Kegg info to module membership for the 9012 genes
Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in GOs column
geneInfo$GOs <- gsub(",", ";", geneInfo$GOs)
geneInfo$GOs <- gsub('"', "", geneInfo$GOs)
geneInfo$GOs <- gsub("-", NA, geneInfo$GOs)
geneInfo$KEGG_new[geneInfo$KEGG_new == ""] <- NA
unique(geneInfo$moduleColor)
## [1] "green" "blue" "salmon" "turquoise" "yellow"
## [6] "black" "red" "magenta" "lightcyan" "purple"
## [11] "brown" "pink" "midnightblue" "tan" "cyan"
geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)]
dim(geneInfo)
## [1] 9012 73
write.csv(geneInfo, file = "../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv") #gene info for reference/supplement
rrvgo package to
reduce redundancy in list of GO terms.### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
calc_up_mods <- c("brown", "red", "black", "pink", "salmon", "blue")
nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")) #942
## [1] 942
nrow(geneInfo %>% dplyr::filter(moduleColor=="red")) #425
## [1] 425
nrow(geneInfo %>% filter(moduleColor=="black")) #396
## [1] 396
nrow(geneInfo %>% filter(moduleColor=="pink")) #220
## [1] 220
nrow(geneInfo %>% filter(moduleColor=="salmon")) #154
## [1] 154
nrow(geneInfo %>% filter(moduleColor=="blue")) #1989
## [1] 1989
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")), nrow(geneInfo %>% dplyr::filter(moduleColor=="red")), nrow(geneInfo %>% filter(moduleColor=="black")), nrow(geneInfo %>% filter(moduleColor=="pink")), nrow(geneInfo %>% filter(moduleColor=="salmon")), nrow(geneInfo %>% filter(moduleColor=="blue")))
## [1] 4126
# 4126
calc_down_mods <- c("turquoise","magenta","lightcyan")
nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")) #2558
## [1] 2558
nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")) #219
## [1] 219
nrow(geneInfo %>% filter(moduleColor=="lightcyan")) #65
## [1] 65
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")), nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")), nrow(geneInfo %>% filter(moduleColor=="lightcyan")))
## [1] 2842
# 2842
other_mods <- c("green","yellow", "purple", "midnightblue","cyan","tan")
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="green")), nrow(geneInfo %>% dplyr::filter(moduleColor=="yellow")), nrow(geneInfo %>% filter(moduleColor=="purple")), nrow(geneInfo %>% filter(moduleColor=="midnightblue")), nrow(geneInfo %>% filter(moduleColor=="cyan")),nrow(geneInfo %>% filter(moduleColor=="tan")))
## [1] 2044
# 2044
# 4126 + 2842 + 2044 = 9012, which represents all of our genes
4126 genes are in the 6 modules significantly upregulated by calcification.
### Generate vector with names in just the module we are analyzing
# ID.vector <- geneInfo %>%
# filter(moduleColor==c("brown", "red", "black", "pink", "salmon", "green")) %>%
# #get_rows(.data[[module]]))%>%
# pull(gene_id)
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("brown", "red", "black", "pink", "salmon", "blue")) %>%
pull(gene_id)
length(ID.vector) #4126
## [1] 4126
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
GO_05 <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
go_results_BP <- GO_05 %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 654 8
head(go_results_BP)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0050794 8.436918e-08 1.0000000 1395
## 2 2 GO:0065007 1.553493e-06 0.9999989 1574
## 3 3 GO:0050789 1.584882e-06 0.9999989 1479
## 4 4 GO:0048523 1.581326e-05 0.9999878 800
## 5 5 GO:0048518 3.043369e-05 0.9999758 950
## 6 6 GO:0051336 3.403563e-05 0.9999783 232
## numInCat term ontology
## 1 2785 regulation of cellular process BP
## 2 3184 biological regulation BP
## 3 2981 regulation of biological process BP
## 4 1569 negative regulation of cellular process BP
## 5 1888 positive regulation of biological process BP
## 6 415 regulation of hydrolase activity BP
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
#compare_clustering_methods(Mat, plot_type = "heatmap")
pdf(file = "../../output/WGCNA/GO_analysis/calc_up_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 654 terms by 'binary_cut'... 184 clusters, used 2.05056 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen
## 2
CalcUpMods_GO_P05 <- go_results_BP %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcUpMods_GO_P05
ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05.pdf", CalcUpMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 508 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_reduced$ParentTerm))
## [1] 73
The reduced list of terms is 508 terms that falls under 73 parent terms.
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
Plot reduced terms
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_calcification <- ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot_calcification
ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification.pdf", plot=GO.plot_calcification, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcUpMods_GO_P05_reduced <- go_results_BP_reduced %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcUpMods_GO_P05_reduced
ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05_reduced.pdf", CalcUpMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
2842 genes are in the 4 modules significantly downregulated by calcification.
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
### Generate vector with names in just the module we are analyzing
ID.vector <- geneInfo%>%
filter(moduleColor %in% c("turquoise","magenta","lightcyan"))%>%
pull(gene_id)
length(ID.vector) #2842
## [1] 2842
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
GO_05 <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
go_results_BP <- GO_05 %>%
filter(ontology=="BP") %>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat > 10) %>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 298 8
head(go_results_BP)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0002181 8.295294e-15 1 67
## 2 2 GO:0006614 2.463649e-13 1 48
## 3 3 GO:0006412 3.096109e-13 1 123
## 4 4 GO:0043043 1.598313e-12 1 126
## 5 5 GO:0006613 2.273026e-12 1 49
## 6 6 GO:0006518 7.543937e-12 1 146
## numInCat term ontology
## 1 90 cytoplasmic translation BP
## 2 59 SRP-dependent cotranslational protein targeting to membrane BP
## 3 219 translation BP
## 4 231 peptide biosynthetic process BP
## 5 63 cotranslational protein targeting to membrane BP
## 6 285 peptide metabolic process BP
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/calc_down_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 298 terms by 'binary_cut'... 87 clusters, used 0.521641 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 4 GO lists...
dev.off()
## quartz_off_screen
## 2
CalcDownMods_GO_P05 <- go_results_BP %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcDownMods_GO_P05
ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05.pdf", CalcDownMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 226 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_reduced$ParentTerm))
## [1] 38
The reduced list of terms is 226 terms that falls under 38 parent terms.
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_calcification_down <- ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot_calcification_down
ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification_down.pdf", plot=GO.plot_calcification_down, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcDownMods_GO_P05_reduced <- go_results_BP_reduced %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcDownMods_GO_P05_reduced
ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05_reduced.pdf", CalcDownMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
# Define the unique module colors
module_colors <- na.omit(unique(geneInfo$moduleColor))
# Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
# Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
# Loop over each unique module color
for (color in module_colors) {
# Filter geneInfo based on the current color
color_filtered <- geneInfo %>% filter(moduleColor == color)
# Generate vector with names in just the module we are analyzing
ID.vector <- color_filtered$gene_id
length(ID.vector)
# Get a list of GO Terms for each module
GO.terms <- color_filtered %>%
dplyr::select(GOs, gene_id) %>%
rename(GOs = "GO.terms")
dim(GO.terms)
## Format to have one GO term per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene_id, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms <- split2
## Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector <- as.integer(ALL.vector %in% ID.vector)
names(gene.vector) <- ALL.vector # set names
# Weight gene vector by bias for length of gene
pwf <- nullp(gene.vector, ID.vector, bias.data = LENGTH.vector)
# Run goseq using Wallenius method for all categories of GO terms
GO.wall <- goseq(pwf, ID.vector, gene2cat = GO.terms, test.cats = c("GO:BP", "GO:MF", "GO:CC"), method = "Wallenius", use_genes_without_cat = TRUE)
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
# Adjust p-values
GO$bh_adjust <- p.adjust(GO$over_represented_pvalue, method = "BH")
# Filtering for p < 0.01
GO <- GO %>%
dplyr::filter(bh_adjust < 0.00001) %>%
dplyr::arrange(., ontology, bh_adjust)
# Write file of results
write.csv(GO, file = paste0("../../output/WGCNA/GO_analysis/goseq_pattern_", color, ".csv"))
go_results <- GO
go_results<-go_results%>%
filter(ontology=="BP")%>%
filter(bh_adjust != "NA") %>%
#filter(numInCat>100)%>%
arrange(., bh_adjust)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
#calculate similarity
scores <- setNames(-log(go_results$bh_adjust), go_results$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
#keep only the goterms from the reduced list
go_results <- go_results %>% filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot <- ggplot(go_results, aes(x = ontology, y = term)) +
geom_point(aes(size=bh_adjust)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot
length(colnames(go_results)[go_results$ParentTerm=="cation transport"])
length(colnames(go_results)[go_results$ParentTerm=="inorganic ion homeostasis"])
length(colnames(go_results)[go_results$ParentTerm=="regulation of cellular response to stress"])
}
GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_05_up %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP_up)
## [1] 654 7
go_results_BP_down <- GO_05_down %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP_down)
## [1] 298 7
all_enriched_terms_up_down <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm)
length(all_enriched_terms_up_down)
## [1] 952
length(unique(all_enriched_terms_up_down))
## [1] 947
By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules.
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down),
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
reducedTerms <- reduceSimMatrix(simMatrix,
scores = "size",
threshold=0.7,
orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 730 10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 78
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 43
#write.csv(go_results_BP_up_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>% mutate(Factor = "Up")
head(cal_up_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0050794 8.436918e-08 1.0000000 1395
## 2 GO:0065007 1.553493e-06 0.9999989 1574
## 3 GO:0050789 1.584882e-06 0.9999989 1479
## 4 GO:0048523 1.581326e-05 0.9999878 800
## 5 GO:0048518 3.043369e-05 0.9999758 950
## 6 GO:0051336 3.403563e-05 0.9999783 232
## numInCat term ontology
## 1 2785 regulation of cellular process BP
## 2 3184 biological regulation BP
## 3 2981 regulation of biological process BP
## 4 1569 negative regulation of cellular process BP
## 5 1888 positive regulation of biological process BP
## 6 415 regulation of hydrolase activity BP
## ParentTerm Factor
## 1 regulation of macromolecule metabolic process Up
## 2 regulation of macromolecule metabolic process Up
## 3 regulation of macromolecule metabolic process Up
## 4 negative regulation of biosynthetic process Up
## 5 regulation of macromolecule metabolic process Up
## 6 regulation of catalytic activity Up
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
head(cal_down_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0002181 8.295294e-15 1 67
## 2 GO:0006614 2.463649e-13 1 48
## 3 GO:0006412 3.096109e-13 1 123
## 4 GO:0043043 1.598313e-12 1 126
## 5 GO:0006613 2.273026e-12 1 49
## 6 GO:0006518 7.543937e-12 1 146
## numInCat term ontology
## 1 90 cytoplasmic translation BP
## 2 59 SRP-dependent cotranslational protein targeting to membrane BP
## 3 219 translation BP
## 4 231 peptide biosynthetic process BP
## 5 63 cotranslational protein targeting to membrane BP
## 6 285 peptide metabolic process BP
## ParentTerm Factor
## 1 translation Down
## 2 protein transport Down
## 3 translation Down
## 4 translation Down
## 5 protein transport Down
## 6 translation Down
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated or downregulated for calcification. The list has been further reduced by using the package rrvgo.
all_terms <- rbind(cal_down_terms,cal_up_terms)
all_terms <- all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","ParentTerm")]
all_terms$GOterm<-as.factor(all_terms$GOterm)
dim(all_terms) #734 reduced terms
## [1] 734 9
length(unique(all_terms$GOterm)) #this represents 730 unique terms between the two lists of reduced terms
## [1] 730
length(unique(all_terms$ParentTerm)) #this represents 87 unique terms between the two lists of reduced terms
## [1] 87
This is collapsing the list from 734 rows to 730, representing the 730 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.
goterms_shared <- all_terms %>%
group_by(GOterm) %>%
dplyr::summarise(
ParentTerm = paste(unique(ParentTerm), collapse = ", "),
Factor = paste(unique(Factor), collapse = ", "),
term = paste(unique(term), collapse = ", ")
)
dim(goterms_shared)
## [1] 730 4
goterms_shared_full <- goterms_shared %>%
left_join(dplyr::select(all_terms,GOterm, Factor, over_represented_pvalue), by = "GOterm") %>% #select 3 columns GOterm, Factor, over_represented_pvalue from all_terms and left join by GOterm, turns this dataframe from 730 rows back to the 734 in all_terms
mutate(pvalue_Up = ifelse(Factor.y == "Up", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Up factor
mutate(pvalue_Down = ifelse(Factor.y == "Down", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Down factor
dplyr::select(-c("over_represented_pvalue", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
dplyr::summarize(pvalue_Up = na.omit(pvalue_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
pvalue_Down = na.omit(pvalue_Down)[1]) %>% #carry over the p-value for the term in the down direction, by taking the first non-NA value.
rename(Factor.x = "Factor") #rename column
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm.csv")
goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()
goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")
nrow(goterms_up)
## [1] 504
nrow(goterms_down)
## [1] 222
nrow(goterms_shared_only)
## [1] 4
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_shared_only) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 77
length(unique(goterms_down$ParentTerm))
## [1] 42
length(unique(goterms_shared_only$ParentTerm))
## [1] 2
freq_fig_up_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "red") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.04) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
guides(color = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
freq_fig_down_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "blue") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.04) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank())
compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs
ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Shared_ParentTerms.pdf", plot=compare_figs, dpi=300, height=3, units="in", limitsize=FALSE)
## Saving 7 x 3 in image
freq_fig_up_pval <- ggplot(goterms_up, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "red") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.05) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
guides(color = FALSE)
freq_fig_down_pval <- ggplot(goterms_down, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "blue") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.05) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank())
compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs
ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Unique_ParentTerms.pdf", plot=compare_figs, dpi=300, height=10, units="in", limitsize=FALSE)
## Saving 7 x 10 in image
I downloaded the GOSlim terms list from the following website on 4/5/2024: http://www.informatics.jax.org/gotools/data/input/map2MGIslim.txt
I did so by going to this link and right clicking “here” at “Tab-delimited file mapping GO_ids to MGI GO_Slim category available for download here” and saying “Save link as…” , and I saved it as “map2MGIslim.txt”. I then converted this to CSV and uploaded to this github repository here.
Add slim terms to dataset
GOslim <- read.csv("../../data/map2MGIslim.csv") %>% dplyr::select(-c(term,aspect))
goterms_shared_full_slim <- goterms_shared_full %>%
inner_join(GOslim, by = c("GOterm" = "GO_id"))
goterms_up_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Up" | Factor=="Down, Up")
goterms_up_full_slim_plot <- goterms_up_slim %>%
ggplot(aes(x=pvalue_Up,
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Over-represented p-value", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text.y = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
); goterms_up_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim.pdf", goterms_up_full_slim_plot, width = 5, height = 35)
goterms_up_full_slim_plot <- goterms_up_slim %>%
ggplot(aes(x=log10(pvalue_Up),
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Log 10(Over-represented p-value)", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text.y = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
); goterms_up_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim_Log.pdf", goterms_up_full_slim_plot, width = 5, height = 35)
goterms_down_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Down" | Factor=="Down, Up")
goterms_down_full_slim_plot <- goterms_down_slim %>%
ggplot(aes(x=pvalue_Down,
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Over-represented p-value", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
legend.text=element_text(size = 5),
legend.title = element_text(size = 5)
); goterms_down_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim.pdf", goterms_down_full_slim_plot, width = 5, height = 35)
goterms_down_full_slim_plot <- goterms_down_slim %>%
ggplot(aes(x=log10(pvalue_Down),
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Log 10(Over-represented p-value)", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
legend.text=element_text(size = 5),
legend.title = element_text(size = 5)
); goterms_down_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim_Log.pdf", goterms_down_full_slim_plot, width = 5, height = 35)
SharedGOterms = # of GO terms within the parent term that are in the list for “Down”, “Up”, or “Down, Up”
result_parent_unique <- goterms_shared %>%
group_by(ParentTerm,Factor) %>%
dplyr::summarise(SharedGOterms = n_distinct(GOterm)) %>%
arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
parent_filtered_up <- result_parent_unique %>%
dplyr::filter(Factor=="Up" | Factor=="Down, Up")
#dplyr::filter(SharedGOterms>=5)
parent_filtered_down <- result_parent_unique %>%
dplyr::filter(Factor=="Down" | Factor=="Down, Up")
#dplyr::filter(SharedGOterms>=5)
result_up <- cal_up_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term))%>%
mutate(Calcification.direction = "Up")
head(result_up)
## # A tibble: 6 × 3
## ParentTerm Number.of.terms Calcification.direct…¹
## <chr> <int> <chr>
## 1 ATP biosynthetic process 5 Up
## 2 Arp2/3 complex-mediated actin nucleati… 16 Up
## 3 DNA dealkylation involved in DNA repair 6 Up
## 4 Golgi organization 2 Up
## 5 IRE1-mediated unfolded protein response 16 Up
## 6 P granule assembly 1 Up
## # ℹ abbreviated name: ¹Calcification.direction
dim(result_up)
## [1] 78 3
merged_up <- parent_filtered_up %>%
tidyr::separate_rows(ParentTerm, sep = ", ") %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
left_join(result_up, by = "ParentTerm") #join with result from above to get the Number.of.Terms column
merged_up_clean <- na.omit(merged_up)
head(merged_up_clean)
## # A tibble: 6 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 ATP biosynthetic … 5 5 Up
## 2 Arp2/3 complex-me… 16 16 Up
## 3 DNA dealkylation … 6 6 Up
## 4 Golgi organization 2 2 Up
## 5 IRE1-mediated unf… 16 16 Up
## 6 P granule assembly 1 1 Up
## # ℹ abbreviated name: ¹Calcification.direction
dim(merged_up_clean)
## [1] 78 4
result_down <- cal_down_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term))%>%
mutate(Calcification.direction = "Down")
dim(result_down)
## [1] 43 3
head(result_down)
## # A tibble: 6 × 3
## ParentTerm Number.of.terms Calcification.direct…¹
## <chr> <int> <chr>
## 1 ATP biosynthetic process 24 Down
## 2 Arp2/3 complex-mediated actin nucleati… 2 Down
## 3 IRE1-mediated unfolded protein response 2 Down
## 4 NADH regeneration 1 Down
## 5 P granule assembly 1 Down
## 6 biological process involved in intersp… 1 Down
## # ℹ abbreviated name: ¹Calcification.direction
merged_down <- parent_filtered_down %>%
tidyr::separate_rows(ParentTerm, sep = ", ") %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
left_join(result_down, by = "ParentTerm")
merged_down_clean<- na.omit(merged_down)
head(merged_down_clean)
## # A tibble: 6 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 ATP biosynthetic … 24 24 Down
## 2 Arp2/3 complex-me… 2 2 Down
## 3 IRE1-mediated unf… 2 2 Down
## 4 NADH regeneration 1 1 Down
## 5 P granule assembly 1 1 Down
## 6 biological proces… 1 1 Down
## # ℹ abbreviated name: ¹Calcification.direction
cal_freq_terms_filtered_up <- merged_up_clean %>%
filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Up")
dim(cal_freq_terms_filtered_up)
## [1] 18 4
#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), group=1))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
cal_freq_terms_filtered_down <- merged_down_clean %>%
filter(Number.of.terms>=10)
#filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 7 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 ATP biosynthetic … 24 24 Down
## 2 biosynthetic proc… 15 15 Down
## 3 metabolic process 13 13 Down
## 4 organic acid meta… 12 12 Down
## 5 peptidyl-serine p… 13 13 Down
## 6 proteasome-mediat… 24 24 Down
## 7 protein transport 13 13 Down
## # ℹ abbreviated name: ¹Calcification.direction
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
#geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
#scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs
cal_freq_terms_filtered_up <- merged_up_clean %>%
filter(Number.of.terms>=20) %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 4 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 positive regulati… 24 24 Up
## 2 regulation of cal… 22 22 Up
## 3 regulation of cat… 21 21 Up
## 4 regulation of mac… 20 20 Up
## # ℹ abbreviated name: ¹Calcification.direction
#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_gr20.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
cal_freq_terms_filtered_down <- merged_down_clean %>%
filter(Number.of.terms>=20)
#filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 ATP biosynthetic … 24 24 Down
## 2 proteasome-mediat… 24 24 Down
## # ℹ abbreviated name: ¹Calcification.direction
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
#geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
#scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_gr20.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs
cal_freq_terms_filtered_up_all <- merged_up_clean %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up_all
## # A tibble: 78 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 ATP biosynthetic… 5 5 Up
## 2 Arp2/3 complex-m… 16 16 Up
## 3 DNA dealkylation… 6 6 Up
## 4 Golgi organizati… 2 2 Up
## 5 IRE1-mediated un… 16 16 Up
## 6 P granule assemb… 1 1 Up
## 7 abscission 1 1 Up
## 8 biological proce… 3 3 Up
## 9 cell communicati… 4 4 Up
## 10 cell differentia… 8 8 Up
## # ℹ 68 more rows
## # ℹ abbreviated name: ¹Calcification.direction
#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_ALL.pdf", plot=freq_fig_up, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image
cal_freq_terms_filtered_down_all <- merged_down_clean %>%
filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down_all
## # A tibble: 42 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 ATP biosynthetic… 24 24 Down
## 2 Arp2/3 complex-m… 2 2 Down
## 3 IRE1-mediated un… 2 2 Down
## 4 NADH regeneration 1 1 Down
## 5 P granule assemb… 1 1 Down
## 6 biological proce… 1 1 Down
## 7 biosynthetic pro… 15 15 Down
## 8 cell communicati… 3 3 Down
## 9 cellular aldehyd… 1 1 Down
## 10 cellular macromo… 2 2 Down
## # ℹ 32 more rows
## # ℹ abbreviated name: ¹Calcification.direction
freq_fig_down<-ggplot(cal_freq_terms_filtered_down_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
#geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
#scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_ALL.pdf", plot=freq_fig_down, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, ncol=2, align="h")
compare_figs